aniketp2009gmail commited on
Commit
13e45fb
·
verified ·
1 Parent(s): 6d4fab8

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +137 -0
  2. requirements.txt +8 -2
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ import os
4
+ import time
5
+ import pandas as pd
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM
7
+ from peft import PeftModel
8
+ from huggingface_hub import HfApi, Table
9
+
10
+ # CONFIGURATION
11
+ BASE_MODEL = "microsoft/Phi-3-mini-4k-instruct"
12
+ ADAPTER_REPO = "aniketp2009gmail/phi3-bilora-code-review"
13
+ FEEDBACK_DATASET = "aniketp2009gmail/code-review-feedback"
14
+
15
+ st.set_page_config(page_title="BiLoRA Code Assistant", page_icon="🧠")
16
+
17
+ # Load the base model and adapters from Hub
18
+ @st.cache_resource(show_spinner="Loading model (this takes ~2 mins on CPU)...")
19
+ def load_model():
20
+ tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO)
21
+ if tokenizer.pad_token is None:
22
+ tokenizer.pad_token = tokenizer.eos_token
23
+ tokenizer.padding_side = "right"
24
+
25
+ model = AutoModelForCausalLM.from_pretrained(
26
+ BASE_MODEL,
27
+ torch_dtype=torch.float16,
28
+ low_cpu_mem_usage=True,
29
+ trust_remote_code=False,
30
+ attn_implementation="eager"
31
+ )
32
+
33
+ model = PeftModel.from_pretrained(model, ADAPTER_REPO, subfolder="task_1", adapter_name="task_1")
34
+ model.load_adapter(ADAPTER_REPO, subfolder="task_2", adapter_name="task_2")
35
+ model.set_adapter("task_1")
36
+ return model, tokenizer
37
+
38
+ import datetime
39
+
40
+ def log_feedback(prompt, response, task, rating):
41
+ """Save feedback to a Hugging Face Dataset CSV"""
42
+ try:
43
+ hf_token = st.secrets.get("HF_TOKEN") or os.environ.get("HF_TOKEN")
44
+ if not hf_token:
45
+ st.error("HF_TOKEN not found. Feedback not saved.")
46
+ return
47
+
48
+ # Create a dictionary for the new entry
49
+ new_data = {
50
+ "timestamp": [str(datetime.datetime.now())],
51
+ "task": [task],
52
+ "prompt": [prompt],
53
+ "response": [response],
54
+ "rating": [rating]
55
+ }
56
+ df = pd.DataFrame(new_data)
57
+
58
+ # In a real Space, we append to a CSV in the Dataset repo
59
+ # Using HfApi to upload/append
60
+ api = HfApi(token=hf_token)
61
+ csv_data = df.to_csv(index=False, header=False)
62
+
63
+ # Path to feedback file in your dataset repo
64
+ repo_id = "aniketp2009gmail/bilora-user-feedback"
65
+
66
+ # Create repo if it doesn't exist
67
+ api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
68
+
69
+ # Upload as a new file named by day to prevent massive file conflicts
70
+ filename = f"feedback_{datetime.date.today()}.csv"
71
+
72
+ # For simplicity in Streamlit, we'll use upload_file.
73
+ # In high-traffic apps, you'd use a more robust queue.
74
+ api.upload_file(
75
+ path_or_fileobj=csv_data.encode(),
76
+ path_in_repo=filename,
77
+ repo_id=repo_id,
78
+ repo_type="dataset",
79
+ run_as_future=True
80
+ )
81
+ st.session_state.feedback_sent = True
82
+ except Exception as e:
83
+ st.error(f"Feedback error: {e}")
84
+
85
+ # UI
86
+ st.title("🧠 BiLoRA: Dual-Adapter Code Assistant")
87
+
88
+ try:
89
+ model, tokenizer = load_model()
90
+
91
+ task_option = st.sidebar.radio(
92
+ "Select Task:",
93
+ options=["task_1", "task_2"],
94
+ format_func=lambda x: "Code Generation" if x == "task_1" else "Docstring Generation"
95
+ )
96
+
97
+ user_input = st.text_area("Input:", height=150)
98
+
99
+ if st.button("Generate"):
100
+ if user_input.strip():
101
+ with st.spinner("Generating..."):
102
+ model.set_adapter(task_option)
103
+ prefix = "Generate code: " if task_option == "task_1" else "Generate docstring: "
104
+ suffix = "\nCode: " if task_option == "task_1" else "\nDocstring: "
105
+
106
+ full_prompt = f"{prefix}{user_input}{suffix}"
107
+ inputs = tokenizer(full_prompt, return_tensors="pt")
108
+
109
+ with torch.no_grad():
110
+ outputs = model.generate(**inputs, max_new_tokens=200, do_sample=True, temperature=0.7)
111
+
112
+ input_length = inputs['input_ids'].shape[1]
113
+ output_text = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
114
+
115
+ st.session_state.last_prompt = user_input
116
+ st.session_state.last_response = output_text
117
+ st.session_state.last_task = task_option
118
+ st.session_state.generated = True
119
+
120
+ if st.session_state.get("generated"):
121
+ st.subheader("Result:")
122
+ st.code(st.session_state.last_response, language="python")
123
+
124
+ st.write("---")
125
+ st.write("Help improve BiLoRA! Was this helpful?")
126
+ col1, col2 = st.columns(5)
127
+ with col1:
128
+ if st.button("👍 Yes"):
129
+ log_feedback(st.session_state.last_prompt, st.session_state.last_response, st.session_state.last_task, 1)
130
+ st.success("Thanks!")
131
+ with col2:
132
+ if st.button("👎 No"):
133
+ log_feedback(st.session_state.last_prompt, st.session_state.last_response, st.session_state.last_task, 0)
134
+ st.error("Feedback received.")
135
+
136
+ except Exception as e:
137
+ st.error(f"Error: {e}")
requirements.txt CHANGED
@@ -1,3 +1,9 @@
1
- altair
 
 
 
 
 
 
 
2
  pandas
3
- streamlit
 
1
+ streamlit
2
+ torch
3
+ transformers
4
+ peft
5
+ accelerate
6
+ sentencepiece
7
+ protobuf
8
+ huggingface_hub
9
  pandas